// Original congressional district-level 2019 unemployment data imported from https://data.census.gov/table/ACSST1Y2019.S2301?q=S2301&g=010XX00US$5000000&y=2019
log using VoterSurveyUnemployment.log, replace
use "C:\Users\sbstjp\OneDrive - Cardiff University\2019ConDisUnempData.dta"

*There are double copies of the c437 observations - cut the doubles
drop in 438/874

*keep necessary variables
keep geo_id name s2301_c01_028e s2301_c04_001e s2301_c04_013e

generate Year = 2019, before(s2301_c01_028e)

* Same has been done with 2021 data - now append 
append using "C:\Users\sbstjp\OneDrive - Cardiff University\2021ConDisUnempData.dta"

* Replace "N" with missing value (.)
replace s2301_c04_013e = "." if s2301_c04_013e == "N"

* Convert the variable from string to numeric
destring s2301_c04_013e, replace

* Ensure the data is sorted by the key variables
sort geo_id Year

* Calculate percentage change for each variable, excluding variables ending in 'm'
foreach var in s2301_c01_028e s2301_c04_001e s2301_c04_013e {
    gen pct_change_`var' = .
    by geo_id (Year), sort: replace pct_change_`var' = 100 * (`var'[_n+1] - `var'[_n]) / `var'[_n] if Year == 2019
}

*Drop unnecessary variables
drop if Year==2021
drop Year

// Generate a variable which can be matched with Voter Survey
* Extract the last four digits from geo_id
gen congress_district = substr(geo_id, -4, 4)

* Create a variable for state abbreviations based on the first two digits of congress_district
gen state_fips = substr(congress_district, 1, 2)

* Map state FIPS codes to state abbreviations
gen state_abbrev = ""
replace state_abbrev = "AL" if state_fips == "01"  // Alabama
replace state_abbrev = "AK" if state_fips == "02"  // Alaska
replace state_abbrev = "AZ" if state_fips == "04"  // Arizona
replace state_abbrev = "AR" if state_fips == "05"  // Arkansas
replace state_abbrev = "CA" if state_fips == "06"  // California
replace state_abbrev = "CO" if state_fips == "08"  // Colorado
replace state_abbrev = "CT" if state_fips == "09"  // Connecticut
replace state_abbrev = "DE" if state_fips == "10"  // Delaware
replace state_abbrev = "FL" if state_fips == "12"  // Florida
replace state_abbrev = "GA" if state_fips == "13"  // Georgia
replace state_abbrev = "HI" if state_fips == "15"  // Hawaii
replace state_abbrev = "ID" if state_fips == "16"  // Idaho
replace state_abbrev = "IL" if state_fips == "17"  // Illinois
replace state_abbrev = "IN" if state_fips == "18"  // Indiana
replace state_abbrev = "IA" if state_fips == "19"  // Iowa
replace state_abbrev = "KS" if state_fips == "20"  // Kansas
replace state_abbrev = "KY" if state_fips == "21"  // Kentucky
replace state_abbrev = "LA" if state_fips == "22"  // Louisiana
replace state_abbrev = "ME" if state_fips == "23"  // Maine
replace state_abbrev = "MD" if state_fips == "24"  // Maryland
replace state_abbrev = "MA" if state_fips == "25"  // Massachusetts
replace state_abbrev = "MI" if state_fips == "26"  // Michigan
replace state_abbrev = "MN" if state_fips == "27"  // Minnesota
replace state_abbrev = "MS" if state_fips == "28"  // Mississippi
replace state_abbrev = "MO" if state_fips == "29"  // Missouri
replace state_abbrev = "MT" if state_fips == "30"  // Montana
replace state_abbrev = "NE" if state_fips == "31"  // Nebraska
replace state_abbrev = "NV" if state_fips == "32"  // Nevada
replace state_abbrev = "NH" if state_fips == "33"  // New Hampshire
replace state_abbrev = "NJ" if state_fips == "34"  // New Jersey
replace state_abbrev = "NM" if state_fips == "35"  // New Mexico
replace state_abbrev = "NY" if state_fips == "36"  // New York
replace state_abbrev = "NC" if state_fips == "37"  // North Carolina
replace state_abbrev = "ND" if state_fips == "38"  // North Dakota
replace state_abbrev = "OH" if state_fips == "39"  // Ohio
replace state_abbrev = "OK" if state_fips == "40"  // Oklahoma
replace state_abbrev = "OR" if state_fips == "41"  // Oregon
replace state_abbrev = "PA" if state_fips == "42"  // Pennsylvania
replace state_abbrev = "RI" if state_fips == "44"  // Rhode Island
replace state_abbrev = "SC" if state_fips == "45"  // South Carolina
replace state_abbrev = "SD" if state_fips == "46"  // South Dakota
replace state_abbrev = "TN" if state_fips == "47"  // Tennessee
replace state_abbrev = "TX" if state_fips == "48"  // Texas
replace state_abbrev = "UT" if state_fips == "49"  // Utah
replace state_abbrev = "VT" if state_fips == "50"  // Vermont
replace state_abbrev = "VA" if state_fips == "51"  // Virginia
replace state_abbrev = "WA" if state_fips == "53"  // Washington
replace state_abbrev = "WV" if state_fips == "54"  // West Virginia
replace state_abbrev = "WI" if state_fips == "55"  // Wisconsin
replace state_abbrev = "WY" if state_fips == "56"  // Wyoming

* Combine state abbreviation with congressional district number
gen formatted_district = state_abbrev + substr(congress_district, 3, 2)

drop in 1 // the first observation is empty
drop congress_district state_fips state_abbrev
rename formatted_district congress_district

*For the later merge 
replace congress_district = "DC00" in 88
replace congress_district = "PR00" in 437

save "C:\Users\sbstjp\OneDrive - Cardiff University\RevisedConDisUnempData.dta" // now save as new file

clear

// Work with Voter Survey dataset
use "C:\Users\sbstjp\OneDrive - Cardiff University\voter_panel.dta" // Democracy Fund Voter Study Group. VIEWS OF THE ELECTORATE RESEARCH SURVEY. Washington, D.C. https://www.voterstudygroup.org/.  Date accessed: March 09, 2025.

// Create a congress_district variable which can be matched
*Create a variable for state abbreviations based on FIPS codes
gen state_abbreviation = ""

*Replace FIPS codes with state abbreviations
replace state_abbreviation = "AL" if inputstate_2020Nov == 1
replace state_abbreviation = "AK" if inputstate_2020Nov == 2
replace state_abbreviation = "AZ" if inputstate_2020Nov == 4
replace state_abbreviation = "AR" if inputstate_2020Nov == 5
replace state_abbreviation = "CA" if inputstate_2020Nov == 6
replace state_abbreviation = "CO" if inputstate_2020Nov == 8
replace state_abbreviation = "CT" if inputstate_2020Nov == 9
replace state_abbreviation = "DE" if inputstate_2020Nov == 10
replace state_abbreviation = "FL" if inputstate_2020Nov == 12
replace state_abbreviation = "GA" if inputstate_2020Nov == 13
replace state_abbreviation = "HI" if inputstate_2020Nov == 15
replace state_abbreviation = "ID" if inputstate_2020Nov == 16
replace state_abbreviation = "IL" if inputstate_2020Nov == 17
replace state_abbreviation = "IN" if inputstate_2020Nov == 18
replace state_abbreviation = "IA" if inputstate_2020Nov == 19
replace state_abbreviation = "KS" if inputstate_2020Nov == 20
replace state_abbreviation = "KY" if inputstate_2020Nov == 21
replace state_abbreviation = "LA" if inputstate_2020Nov == 22
replace state_abbreviation = "ME" if inputstate_2020Nov == 23
replace state_abbreviation = "MD" if inputstate_2020Nov == 24
replace state_abbreviation = "MA" if inputstate_2020Nov == 25
replace state_abbreviation = "MI" if inputstate_2020Nov == 26
replace state_abbreviation = "MN" if inputstate_2020Nov == 27
replace state_abbreviation = "MS" if inputstate_2020Nov == 28
replace state_abbreviation = "MO" if inputstate_2020Nov == 29
replace state_abbreviation = "MT" if inputstate_2020Nov == 30
replace state_abbreviation = "NE" if inputstate_2020Nov == 31
replace state_abbreviation = "NV" if inputstate_2020Nov == 32
replace state_abbreviation = "NH" if inputstate_2020Nov == 33
replace state_abbreviation = "NJ" if inputstate_2020Nov == 34
replace state_abbreviation = "NM" if inputstate_2020Nov == 35
replace state_abbreviation = "NY" if inputstate_2020Nov == 36
replace state_abbreviation = "NC" if inputstate_2020Nov == 37
replace state_abbreviation = "ND" if inputstate_2020Nov == 38
replace state_abbreviation = "OH" if inputstate_2020Nov == 39
replace state_abbreviation = "OK" if inputstate_2020Nov == 40
replace state_abbreviation = "OR" if inputstate_2020Nov == 41
replace state_abbreviation = "PA" if inputstate_2020Nov == 42
replace state_abbreviation = "RI" if inputstate_2020Nov == 44
replace state_abbreviation = "SC" if inputstate_2020Nov == 45
replace state_abbreviation = "SD" if inputstate_2020Nov == 46
replace state_abbreviation = "TN" if inputstate_2020Nov == 47
replace state_abbreviation = "TX" if inputstate_2020Nov == 48
replace state_abbreviation = "UT" if inputstate_2020Nov == 49
replace state_abbreviation = "VT" if inputstate_2020Nov == 50
replace state_abbreviation = "VA" if inputstate_2020Nov == 51
replace state_abbreviation = "WA" if inputstate_2020Nov == 53
replace state_abbreviation = "WV" if inputstate_2020Nov == 54
replace state_abbreviation = "WI" if inputstate_2020Nov == 55
replace state_abbreviation = "WY" if inputstate_2020Nov == 56

*Ensure the string variable has two digits with a leading zero
replace cdid_2020Nov = substr("0" + cdid_2020Nov, -2, .)

*Verify
list cdid_2020Nov if !missing(cdid_2020Nov)

replace cdid_2020Nov = "." if trim(cdid_2020Nov) == "0"

gen congress_district = state_abbreviation + cdid_2020Nov

// Create social justice scale
*Delete missing
replace ft_blm_2020Sep=. if ft_blm_2020Sep>100
replace reparations_2020Sep=. if reparations_2020Sep>2
replace defundpolice_2020Sep=. if defundpolice_2020Sep>2 
replace police_threat_2020Sep=. if police_threat_2020Sep>2
replace usa_founders_2020Sep=. if usa_founders_2020Sep>2
replace internetharass_dem_2020Sep=. if internetharass_dem_2020Sep==9

*Reverse coding so social justice values are coded high
foreach var in reparations_2020Sep defundpolice_2020Sep usa_founders_2020Sep {
    qui sum `var'
    local max_value = r(max)
    gen r`var' = `max_value' - `var'
}

*Standardize items in the scale from 1-2 - this avoids 0, for reasons outlined in next step
foreach var in ft_blm_2020Sep rreparations_2020Sep rdefundpolice_2020Sep rusa_founders_2020Sep police_threat_2020Sep internetharass_dem_2020Sep {
    summarize `var'
    gen s`var' = 1 + (`var' - r(min)) / (r(max) - r(min))
}

*At this stage, the scale has a Cronbach's alpha of 0.83

* Replace missing values with 0 for the specified variables - this is necessary as Stata doesn't add up missing values
foreach var in sft_blm_2020Sep srreparations_2020Sep srdefundpolice_2020Sep srusa_founders_2020Sep spolice_threat_2020Sep sinternetharass_dem_2020Sep {
    replace `var' = 0 if missing(`var')
}

* Initialize the total score and the count of non-zero responses
gen total_scoreSJV = 0
gen count_nonzeroSJV = 0

* Add each variable to the total scale score and count it if non-zero
foreach var in sft_blm_2020Sep srreparations_2020Sep srdefundpolice_2020Sep srusa_founders_2020Sep spolice_threat_2020Sep sinternetharass_dem_2020Sep {
    replace total_scoreSJV = total_scoreSJV + `var'
    replace count_nonzeroSJV = count_nonzeroSJV + (`var' != 0)
}

* Calculate the average score, avoiding division by zero
gen SocJusValues = .
replace SocJusValues = total_scoreSJV / count_nonzeroSJV if count_nonzeroSJV > 0

// Merge
merge m:1 congress_district using "C:\Users\sbstjp\OneDrive - Cardiff University\RevisedConDisUnempData.dta"

*Rename some variables
rename pct_change_s2301_c04_001e totalunempratechange // Change in total unemployment rate from 2019-21
rename pct_change_s2301_c04_013e blackunempchange // Change in Black unemployment rate from 2019-21
rename pct_change_s2301_c01_028e belowpovlineinlast12mnths // Change in % of population below poverty line in last 12 months from 2019-21

// Exploratory analysis
pwcorr SocJusValues totalunempratechange belowpovlineinlast12mnths blackunempchange [aweight=weight_genpop_2020Sep], sig 

log close
